{
 "cells": [
  {
   "cell_type": "code",
   "execution_count": 1,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "import optuna\n",
    "import pandas as pd\n",
    "import numpy as np\n",
    "from sklearn.preprocessing import MinMaxScaler\n",
    "from sklearn.metrics import confusion_matrix\n",
    "from sklearn.neighbors import KNeighborsClassifier\n",
    "from sklearn.tree import DecisionTreeClassifier\n",
    "from sklearn.ensemble import RandomForestClassifier\n",
    "from sklearn.svm import SVC\n",
    "from sklearn.preprocessing import MinMaxScaler"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 2,
   "metadata": {},
   "outputs": [],
   "source": [
    "def specificity_score(y_true, y_pred):\n",
    "    tn, fp, fn, tp = confusion_matrix(y_true, y_pred).ravel()\n",
    "    return tn / (tn + fp) if (tn + fp) > 0 else 0\n",
    "\n",
    "def evaluate_model_on_folds(model, folds=5, prefix=\"data/fold_\", disp=0):\n",
    "    metrics = {\n",
    "        'f1': [],\n",
    "        'acc': [],\n",
    "        'recall': [],\n",
    "        'specificity': [],\n",
    "        'tp': [],\n",
    "        'fp': [],\n",
    "        'tn': [],\n",
    "        'fn': [],\n",
    "        'conf_matrices': []\n",
    "    }\n",
    "\n",
    "    total_conf_matrix = np.zeros((2, 2), dtype=int)\n",
    "    for i in range(folds):\n",
    "        train = pd.read_csv(f\"{prefix}{i+1}_train.csv\")\n",
    "        test = pd.read_csv(f\"{prefix}{i+1}_test.csv\")\n",
    "        \n",
    "        test['arrytmia'] = test['arrytmia'].apply(lambda x: 1 if x > 1 else x)\n",
    "        train['arrytmia'] = train['arrytmia'].apply(lambda x: 1 if x > 1 else x)\n",
    "\n",
    "        scaler = MinMaxScaler()\n",
    "        train = pd.DataFrame(scaler.fit_transform(train), columns=train.columns)\n",
    "\n",
    "        test = pd.DataFrame(scaler.transform(test), columns=test.columns)\n",
    "\n",
    "        X_train = train.iloc[: ,1:].values\n",
    "        y_train = train['arrytmia'].values\n",
    "\n",
    "        X_test = test.iloc[: ,1:].values\n",
    "        y_test = test['arrytmia'].values\n",
    "        \n",
    "        model.fit(X_train, y_train)\n",
    "        y_pred = model.predict(X_test)\n",
    "        \n",
    "        cm = confusion_matrix(y_test, y_pred)\n",
    "        total_conf_matrix += cm\n",
    "\n",
    "    # Rozpakowanie łącznej macierzy\n",
    "    tn, fp, fn, tp = total_conf_matrix.ravel()\n",
    "\n",
    "    accuracy = (tp + tn) / (tp + tn + fp + fn)\n",
    "\n",
    "    precision=  tp / (tp+fp)if (tp+fp) > 0 else 0\n",
    "    sensitivity = tp / (tp + fn) if (tp + fn) > 0 else 0\n",
    "    specificity = tn / (tn + fp) if (tn + fp) > 0 else 0\n",
    "    F1 = 2 * tp / (2 * tp + fp + fn) if (2 * tp + fp + fn) > 0 else 0\n",
    "\n",
    "    if disp:\n",
    "        print(f\"F1:           {F1:.4f}\")\n",
    "        print(f\"Accuracy:     {accuracy:.4f}\")\n",
    "        print(f\"Precision:    {precision:.4f}\")\n",
    "        print(f\"Sensitivity:  {sensitivity:.4f}\")\n",
    "        print(f\"Specificity:  {specificity:.4f}\")\n",
    "        print(f\"TP:           {tp}\")\n",
    "        print(f\"FP:           {fp}\")\n",
    "        print(f\"TN:           {tn}\")\n",
    "        print(f\"FN:           {fn}\")\n",
    "        print(\"         Pred 1    Pred 0\")\n",
    "        print(f\"True 1    {tp:4}     {fn:4}\")\n",
    "        print(f\"True 0    {fp:4}     {tn:4}\")\n",
    "\n",
    "    return F1"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## KNN"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 3,
   "metadata": {},
   "outputs": [],
   "source": [
    "n_trials=20"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[I 2025-04-13 22:37:24,189] A new study created in memory with name: no-name-2e913b86-9365-4a6f-a266-8899025dd2d5\n",
      "[I 2025-04-13 22:37:32,987] Trial 0 finished with value: 0.9658 and parameters: {'n_neighbors': 184, 'weights': 'distance', 'algorithm': 'brute', 'metric': 'manhattan'}. Best is trial 0 with value: 0.9658.\n",
      "[I 2025-04-13 22:37:45,398] Trial 1 finished with value: 0.9728 and parameters: {'n_neighbors': 25, 'weights': 'distance', 'algorithm': 'kd_tree', 'metric': 'manhattan'}. Best is trial 1 with value: 0.9728.\n",
      "[I 2025-04-13 22:38:00,776] Trial 2 finished with value: 0.9692 and parameters: {'n_neighbors': 78, 'weights': 'distance', 'algorithm': 'kd_tree', 'metric': 'manhattan'}. Best is trial 1 with value: 0.9728.\n",
      "[I 2025-04-13 22:38:06,224] Trial 3 finished with value: 0.9667 and parameters: {'n_neighbors': 181, 'weights': 'uniform', 'algorithm': 'auto', 'metric': 'euclidean'}. Best is trial 1 with value: 0.9728.\n",
      "[I 2025-04-13 22:38:15,666] Trial 4 finished with value: 0.9728 and parameters: {'n_neighbors': 30, 'weights': 'uniform', 'algorithm': 'kd_tree', 'metric': 'euclidean'}. Best is trial 1 with value: 0.9728.\n",
      "[I 2025-04-13 22:38:24,078] Trial 5 finished with value: 0.9688 and parameters: {'n_neighbors': 74, 'weights': 'uniform', 'algorithm': 'auto', 'metric': 'manhattan'}. Best is trial 1 with value: 0.9728.\n",
      "[I 2025-04-13 22:38:47,705] Trial 6 finished with value: 0.9731 and parameters: {'n_neighbors': 22, 'weights': 'uniform', 'algorithm': 'ball_tree', 'metric': 'manhattan'}. Best is trial 6 with value: 0.9731.\n",
      "[I 2025-04-13 22:38:52,357] Trial 7 finished with value: 0.9675 and parameters: {'n_neighbors': 185, 'weights': 'distance', 'algorithm': 'auto', 'metric': 'euclidean'}. Best is trial 6 with value: 0.9731.\n",
      "[I 2025-04-13 22:39:05,021] Trial 8 finished with value: 0.968 and parameters: {'n_neighbors': 160, 'weights': 'distance', 'algorithm': 'kd_tree', 'metric': 'euclidean'}. Best is trial 6 with value: 0.9731.\n",
      "[I 2025-04-13 22:39:13,654] Trial 9 finished with value: 0.9659 and parameters: {'n_neighbors': 144, 'weights': 'uniform', 'algorithm': 'auto', 'metric': 'manhattan'}. Best is trial 6 with value: 0.9731.\n",
      "[I 2025-04-13 22:39:34,220] Trial 10 finished with value: 0.9753 and parameters: {'n_neighbors': 8, 'weights': 'uniform', 'algorithm': 'ball_tree', 'metric': 'manhattan'}. Best is trial 10 with value: 0.9753.\n",
      "[I 2025-04-13 22:39:55,109] Trial 11 finished with value: 0.9749 and parameters: {'n_neighbors': 3, 'weights': 'uniform', 'algorithm': 'ball_tree', 'metric': 'manhattan'}. Best is trial 10 with value: 0.9753.\n",
      "[I 2025-04-13 22:40:16,762] Trial 12 finished with value: 0.9748 and parameters: {'n_neighbors': 4, 'weights': 'uniform', 'algorithm': 'ball_tree', 'metric': 'manhattan'}. Best is trial 10 with value: 0.9753.\n",
      "[I 2025-04-13 22:40:41,018] Trial 13 finished with value: 0.9704 and parameters: {'n_neighbors': 49, 'weights': 'uniform', 'algorithm': 'ball_tree', 'metric': 'manhattan'}. Best is trial 10 with value: 0.9753.\n",
      "[I 2025-04-13 22:41:07,726] Trial 14 finished with value: 0.9669 and parameters: {'n_neighbors': 110, 'weights': 'uniform', 'algorithm': 'ball_tree', 'metric': 'manhattan'}. Best is trial 10 with value: 0.9753.\n",
      "[I 2025-04-13 22:41:26,788] Trial 15 finished with value: 0.9718 and parameters: {'n_neighbors': 1, 'weights': 'uniform', 'algorithm': 'ball_tree', 'metric': 'manhattan'}. Best is trial 10 with value: 0.9753.\n",
      "[I 2025-04-13 22:41:35,188] Trial 16 finished with value: 0.9693 and parameters: {'n_neighbors': 66, 'weights': 'uniform', 'algorithm': 'brute', 'metric': 'manhattan'}. Best is trial 10 with value: 0.9753.\n",
      "[I 2025-04-13 22:42:02,183] Trial 17 finished with value: 0.9669 and parameters: {'n_neighbors': 110, 'weights': 'uniform', 'algorithm': 'ball_tree', 'metric': 'manhattan'}. Best is trial 10 with value: 0.9753.\n",
      "[I 2025-04-13 22:42:26,707] Trial 18 finished with value: 0.9714 and parameters: {'n_neighbors': 47, 'weights': 'uniform', 'algorithm': 'ball_tree', 'metric': 'euclidean'}. Best is trial 10 with value: 0.9753.\n",
      "[I 2025-04-13 22:42:54,020] Trial 19 finished with value: 0.9675 and parameters: {'n_neighbors': 93, 'weights': 'uniform', 'algorithm': 'ball_tree', 'metric': 'manhattan'}. Best is trial 10 with value: 0.9753.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Najlepsze parametry: {'n_neighbors': 8, 'weights': 'uniform', 'algorithm': 'ball_tree', 'metric': 'manhattan'}\n"
     ]
    }
   ],
   "source": [
    "def objective(trial):\n",
    "    n_neighbors = trial.suggest_int('n_neighbors', 1, 200) # Liczba sąsiadów\n",
    "    weights = trial.suggest_categorical('weights', ['uniform', 'distance']) #Sposób ważenia sąsiadów\n",
    "    algorithm = trial.suggest_categorical('algorithm', ['auto', 'ball_tree', 'kd_tree', 'brute'])\n",
    "    metric=trial.suggest_categorical('metric',['euclidean','manhattan'])\n",
    "    \n",
    "    model = KNeighborsClassifier(\n",
    "        n_neighbors=n_neighbors,\n",
    "        weights=weights,\n",
    "        algorithm=algorithm,\n",
    "        metric=metric,\n",
    "        n_jobs=-1\n",
    "    )\n",
    "\n",
    "\n",
    "    score=evaluate_model_on_folds(model)\n",
    "    return round(score,4) \n",
    "\n",
    "study = optuna.create_study(direction='maximize')\n",
    "study.optimize(objective, n_trials=n_trials)\n",
    "\n",
    "best_params = study.best_params\n",
    "print(\"Najlepsze parametry:\", best_params)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "F1:           0.9753\n",
      "Accuracy:     0.9627\n",
      "Precision:    0.9656\n",
      "Sensitivity:  0.9853\n",
      "Specificity:  0.8960\n",
      "TP:           47675\n",
      "FP:           1700\n",
      "TN:           14640\n",
      "FN:           712\n",
      "         Pred 1    Pred 0\n",
      "True 1    47675      712\n",
      "True 0    1700     14640\n"
     ]
    }
   ],
   "source": [
    "best_model = KNeighborsClassifier(**best_params)\n",
    "score=evaluate_model_on_folds(best_model,disp=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## DecisionTree"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[I 2025-04-13 22:43:16,247] A new study created in memory with name: no-name-65ba793c-fa6f-4d3f-967a-272cc1abee2e\n",
      "[I 2025-04-13 22:43:17,580] Trial 0 finished with value: 0.939 and parameters: {'criterion': 'entropy', 'splitter': 'best', 'max_depth': 4, 'min_samples_split': 13, 'min_samples_leaf': 14, 'max_features': 'log2'}. Best is trial 0 with value: 0.939.\n",
      "[I 2025-04-13 22:43:18,617] Trial 1 finished with value: 0.943 and parameters: {'criterion': 'gini', 'splitter': 'random', 'max_depth': 5, 'min_samples_split': 12, 'min_samples_leaf': 19, 'max_features': None}. Best is trial 1 with value: 0.943.\n",
      "[I 2025-04-13 22:43:25,109] Trial 2 finished with value: 0.9666 and parameters: {'criterion': 'gini', 'splitter': 'best', 'max_depth': 12, 'min_samples_split': 17, 'min_samples_leaf': 7, 'max_features': None}. Best is trial 2 with value: 0.9666.\n",
      "[I 2025-04-13 22:43:26,046] Trial 3 finished with value: 0.8862 and parameters: {'criterion': 'log_loss', 'splitter': 'random', 'max_depth': 6, 'min_samples_split': 7, 'min_samples_leaf': 16, 'max_features': 'log2'}. Best is trial 2 with value: 0.9666.\n",
      "[I 2025-04-13 22:43:27,332] Trial 4 finished with value: 0.9353 and parameters: {'criterion': 'entropy', 'splitter': 'best', 'max_depth': 3, 'min_samples_split': 20, 'min_samples_leaf': 8, 'max_features': 'sqrt'}. Best is trial 2 with value: 0.9666.\n",
      "[I 2025-04-13 22:43:28,417] Trial 5 finished with value: 0.9536 and parameters: {'criterion': 'gini', 'splitter': 'random', 'max_depth': 7, 'min_samples_split': 8, 'min_samples_leaf': 13, 'max_features': None}. Best is trial 2 with value: 0.9666.\n",
      "[I 2025-04-13 22:43:29,911] Trial 6 finished with value: 0.9547 and parameters: {'criterion': 'gini', 'splitter': 'best', 'max_depth': 8, 'min_samples_split': 20, 'min_samples_leaf': 7, 'max_features': 'log2'}. Best is trial 2 with value: 0.9666.\n",
      "[I 2025-04-13 22:43:31,082] Trial 7 finished with value: 0.9618 and parameters: {'criterion': 'gini', 'splitter': 'random', 'max_depth': 12, 'min_samples_split': 8, 'min_samples_leaf': 14, 'max_features': None}. Best is trial 2 with value: 0.9666.\n",
      "[I 2025-04-13 22:43:32,327] Trial 8 finished with value: 0.9353 and parameters: {'criterion': 'entropy', 'splitter': 'best', 'max_depth': 3, 'min_samples_split': 4, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 2 with value: 0.9666.\n",
      "[I 2025-04-13 22:43:33,282] Trial 9 finished with value: 0.9198 and parameters: {'criterion': 'log_loss', 'splitter': 'random', 'max_depth': 3, 'min_samples_split': 2, 'min_samples_leaf': 19, 'max_features': None}. Best is trial 2 with value: 0.9666.\n",
      "[I 2025-04-13 22:43:40,352] Trial 10 finished with value: 0.9658 and parameters: {'criterion': 'gini', 'splitter': 'best', 'max_depth': 16, 'min_samples_split': 16, 'min_samples_leaf': 4, 'max_features': None}. Best is trial 2 with value: 0.9666.\n",
      "[I 2025-04-13 22:43:47,969] Trial 11 finished with value: 0.9649 and parameters: {'criterion': 'gini', 'splitter': 'best', 'max_depth': 18, 'min_samples_split': 16, 'min_samples_leaf': 2, 'max_features': None}. Best is trial 2 with value: 0.9666.\n",
      "[I 2025-04-13 22:43:54,985] Trial 12 finished with value: 0.965 and parameters: {'criterion': 'gini', 'splitter': 'best', 'max_depth': 15, 'min_samples_split': 16, 'min_samples_leaf': 1, 'max_features': None}. Best is trial 2 with value: 0.9666.\n",
      "[I 2025-04-13 22:44:02,009] Trial 13 finished with value: 0.9659 and parameters: {'criterion': 'gini', 'splitter': 'best', 'max_depth': 20, 'min_samples_split': 16, 'min_samples_leaf': 5, 'max_features': None}. Best is trial 2 with value: 0.9666.\n",
      "[I 2025-04-13 22:44:08,585] Trial 14 finished with value: 0.9674 and parameters: {'criterion': 'gini', 'splitter': 'best', 'max_depth': 20, 'min_samples_split': 18, 'min_samples_leaf': 10, 'max_features': None}. Best is trial 14 with value: 0.9674.\n",
      "[I 2025-04-13 22:44:10,536] Trial 15 finished with value: 0.9621 and parameters: {'criterion': 'log_loss', 'splitter': 'best', 'max_depth': 13, 'min_samples_split': 18, 'min_samples_leaf': 10, 'max_features': 'sqrt'}. Best is trial 14 with value: 0.9674.\n",
      "[I 2025-04-13 22:44:17,262] Trial 16 finished with value: 0.9674 and parameters: {'criterion': 'gini', 'splitter': 'best', 'max_depth': 20, 'min_samples_split': 13, 'min_samples_leaf': 10, 'max_features': None}. Best is trial 14 with value: 0.9674.\n",
      "[I 2025-04-13 22:44:23,838] Trial 17 finished with value: 0.9674 and parameters: {'criterion': 'gini', 'splitter': 'best', 'max_depth': 20, 'min_samples_split': 13, 'min_samples_leaf': 10, 'max_features': None}. Best is trial 14 with value: 0.9674.\n",
      "[I 2025-04-13 22:44:25,633] Trial 18 finished with value: 0.9633 and parameters: {'criterion': 'log_loss', 'splitter': 'best', 'max_depth': 17, 'min_samples_split': 11, 'min_samples_leaf': 12, 'max_features': 'log2'}. Best is trial 14 with value: 0.9674.\n",
      "[I 2025-04-13 22:44:27,496] Trial 19 finished with value: 0.9596 and parameters: {'criterion': 'entropy', 'splitter': 'best', 'max_depth': 10, 'min_samples_split': 14, 'min_samples_leaf': 9, 'max_features': 'sqrt'}. Best is trial 14 with value: 0.9674.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Najlepsze parametry: {'criterion': 'gini', 'splitter': 'best', 'max_depth': 20, 'min_samples_split': 18, 'min_samples_leaf': 10, 'max_features': None}\n"
     ]
    }
   ],
   "source": [
    "def objective(trial):\n",
    "    criterion = trial.suggest_categorical('criterion', ['gini', 'entropy','log_loss']) \n",
    "    splitter = trial.suggest_categorical('splitter', ['best', 'random'])\n",
    "    max_depth = trial.suggest_int('max_depth', 3, 20) \n",
    "    min_samples_split = trial.suggest_int('min_samples_split', 2, 20)\n",
    "    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 20)\n",
    "    max_features = trial.suggest_categorical('max_features', ['sqrt', 'log2', None])\n",
    "    #DODAJ CCP_ALPHA\n",
    "\n",
    "    model = DecisionTreeClassifier(\n",
    "        criterion=criterion,\n",
    "        max_depth=max_depth,\n",
    "        min_samples_split=min_samples_split,\n",
    "        min_samples_leaf=min_samples_leaf,\n",
    "        max_features=max_features,\n",
    "        splitter=splitter,\n",
    "        random_state=42\n",
    "    )\n",
    "\n",
    "\n",
    "    score=evaluate_model_on_folds(model)\n",
    "    return round(score,4)  \n",
    "\n",
    "study = optuna.create_study(direction='maximize')\n",
    "study.optimize(objective, n_trials=n_trials)\n",
    "\n",
    "best_params = study.best_params\n",
    "print(\"Najlepsze parametry:\", best_params)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "F1:           0.9674\n",
      "Accuracy:     0.9513\n",
      "Precision:    0.9674\n",
      "Sensitivity:  0.9674\n",
      "Specificity:  0.9034\n",
      "TP:           46810\n",
      "FP:           1578\n",
      "TN:           14762\n",
      "FN:           1577\n",
      "         Pred 1    Pred 0\n",
      "True 1    46810     1577\n",
      "True 0    1578     14762\n"
     ]
    }
   ],
   "source": [
    "best_model = DecisionTreeClassifier(**best_params, random_state=42)\n",
    "score=evaluate_model_on_folds(best_model,disp=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## RandomForestClassifier"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[I 2025-04-13 22:44:34,014] A new study created in memory with name: no-name-2241ecb4-c471-4a91-9b52-d6bc72d24f3b\n",
      "[I 2025-04-13 22:45:10,184] Trial 0 finished with value: 0.9772 and parameters: {'n_estimators': 59, 'criterion': 'entropy', 'max_depth': 33, 'min_samples_split': 12, 'min_samples_leaf': 9, 'max_features': 'log2'}. Best is trial 0 with value: 0.9772.\n",
      "[I 2025-04-13 22:46:11,677] Trial 1 finished with value: 0.9787 and parameters: {'n_estimators': 80, 'criterion': 'entropy', 'max_depth': 48, 'min_samples_split': 9, 'min_samples_leaf': 1, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9787.\n",
      "[I 2025-04-13 22:48:29,540] Trial 2 finished with value: 0.9777 and parameters: {'n_estimators': 188, 'criterion': 'gini', 'max_depth': 24, 'min_samples_split': 10, 'min_samples_leaf': 4, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9787.\n",
      "[I 2025-04-13 22:50:52,282] Trial 3 finished with value: 0.9771 and parameters: {'n_estimators': 194, 'criterion': 'log_loss', 'max_depth': 20, 'min_samples_split': 13, 'min_samples_leaf': 12, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9787.\n",
      "[I 2025-04-13 22:51:46,717] Trial 4 finished with value: 0.9781 and parameters: {'n_estimators': 83, 'criterion': 'entropy', 'max_depth': 42, 'min_samples_split': 10, 'min_samples_leaf': 1, 'max_features': 'log2'}. Best is trial 1 with value: 0.9787.\n",
      "[I 2025-04-13 22:52:38,797] Trial 5 finished with value: 0.9774 and parameters: {'n_estimators': 81, 'criterion': 'gini', 'max_depth': 50, 'min_samples_split': 12, 'min_samples_leaf': 3, 'max_features': 'log2'}. Best is trial 1 with value: 0.9787.\n",
      "[I 2025-04-13 22:54:44,771] Trial 6 finished with value: 0.9765 and parameters: {'n_estimators': 196, 'criterion': 'log_loss', 'max_depth': 39, 'min_samples_split': 5, 'min_samples_leaf': 14, 'max_features': 'log2'}. Best is trial 1 with value: 0.9787.\n",
      "[I 2025-04-13 22:55:32,960] Trial 7 finished with value: 0.9778 and parameters: {'n_estimators': 71, 'criterion': 'gini', 'max_depth': 23, 'min_samples_split': 6, 'min_samples_leaf': 2, 'max_features': 'log2'}. Best is trial 1 with value: 0.9787.\n",
      "[I 2025-04-13 22:57:25,379] Trial 8 finished with value: 0.9769 and parameters: {'n_estimators': 169, 'criterion': 'log_loss', 'max_depth': 18, 'min_samples_split': 18, 'min_samples_leaf': 10, 'max_features': 'log2'}. Best is trial 1 with value: 0.9787.\n",
      "[I 2025-04-13 22:58:14,963] Trial 9 finished with value: 0.9761 and parameters: {'n_estimators': 77, 'criterion': 'log_loss', 'max_depth': 48, 'min_samples_split': 17, 'min_samples_leaf': 15, 'max_features': 'log2'}. Best is trial 1 with value: 0.9787.\n",
      "[I 2025-04-13 22:59:44,595] Trial 10 finished with value: 0.9752 and parameters: {'n_estimators': 123, 'criterion': 'entropy', 'max_depth': 11, 'min_samples_split': 3, 'min_samples_leaf': 18, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9787.\n",
      "[I 2025-04-13 23:01:32,009] Trial 11 finished with value: 0.9779 and parameters: {'n_estimators': 108, 'criterion': 'entropy', 'max_depth': 42, 'min_samples_split': 8, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9787.\n",
      "[I 2025-04-13 23:03:39,343] Trial 12 finished with value: 0.9785 and parameters: {'n_estimators': 100, 'criterion': 'entropy', 'max_depth': 34, 'min_samples_split': 15, 'min_samples_leaf': 1, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9787.\n",
      "[I 2025-04-13 23:04:59,208] Trial 13 finished with value: 0.9782 and parameters: {'n_estimators': 105, 'criterion': 'entropy', 'max_depth': 30, 'min_samples_split': 15, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9787.\n",
      "[I 2025-04-13 23:06:55,249] Trial 14 finished with value: 0.9781 and parameters: {'n_estimators': 149, 'criterion': 'entropy', 'max_depth': 34, 'min_samples_split': 20, 'min_samples_leaf': 6, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9787.\n",
      "[I 2025-04-13 23:07:20,599] Trial 15 finished with value: 0.9576 and parameters: {'n_estimators': 50, 'criterion': 'entropy', 'max_depth': 5, 'min_samples_split': 15, 'min_samples_leaf': 1, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9787.\n",
      "[I 2025-04-13 23:09:19,011] Trial 16 finished with value: 0.9779 and parameters: {'n_estimators': 100, 'criterion': 'entropy', 'max_depth': 39, 'min_samples_split': 9, 'min_samples_leaf': 8, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9787.\n",
      "[I 2025-04-13 23:11:03,036] Trial 17 finished with value: 0.9783 and parameters: {'n_estimators': 134, 'criterion': 'entropy', 'max_depth': 45, 'min_samples_split': 14, 'min_samples_leaf': 4, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9787.\n",
      "[I 2025-04-13 23:12:35,689] Trial 18 finished with value: 0.9759 and parameters: {'n_estimators': 122, 'criterion': 'entropy', 'max_depth': 34, 'min_samples_split': 7, 'min_samples_leaf': 20, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9787.\n",
      "[I 2025-04-13 23:13:48,766] Trial 19 finished with value: 0.9769 and parameters: {'n_estimators': 94, 'criterion': 'gini', 'max_depth': 27, 'min_samples_split': 3, 'min_samples_leaf': 7, 'max_features': 'sqrt'}. Best is trial 1 with value: 0.9787.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Najlepsze parametry: {'n_estimators': 80, 'criterion': 'entropy', 'max_depth': 48, 'min_samples_split': 9, 'min_samples_leaf': 1, 'max_features': 'sqrt'}\n"
     ]
    }
   ],
   "source": [
    "def objective(trial):\n",
    "    n_estimators = trial.suggest_int('n_estimators', 50, 200) # liczba drzew w lesie\n",
    "    criterion = trial.suggest_categorical('criterion', ['gini', 'entropy', 'log_loss'])\n",
    "    max_depth = trial.suggest_int('max_depth', 5, 50) # Maksymalna głebokość drzewa\n",
    "    min_samples_split = trial.suggest_int('min_samples_split', 2, 20) # Minimalna liczba próbek w węźle wymagana do podzielenia\n",
    "    min_samples_leaf = trial.suggest_int('min_samples_leaf', 1, 20) # minimalna liczba próbek w liściu\n",
    "    max_features = trial.suggest_categorical('max_features', ['sqrt', 'log2']) # Liczba cech rozważanych podczas rodzielania węzła\n",
    "    \n",
    "    model = RandomForestClassifier(\n",
    "        criterion=criterion,\n",
    "        n_estimators=n_estimators,\n",
    "        max_depth=max_depth,\n",
    "        min_samples_split=min_samples_split,\n",
    "        min_samples_leaf=min_samples_leaf,\n",
    "        max_features=max_features,\n",
    "        random_state=42\n",
    "    )\n",
    "\n",
    "    \n",
    "    score=evaluate_model_on_folds(model)\n",
    "    return round(score,4)   \n",
    "\n",
    "study = optuna.create_study(direction='maximize')\n",
    "study.optimize(objective, n_trials=n_trials)\n",
    "\n",
    "best_params = study.best_params\n",
    "print(\"Najlepsze parametry:\", best_params)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "F1:           0.9787\n",
      "Accuracy:     0.9679\n",
      "Precision:    0.9717\n",
      "Sensitivity:  0.9858\n",
      "Specificity:  0.9150\n",
      "TP:           47700\n",
      "FP:           1389\n",
      "TN:           14951\n",
      "FN:           687\n",
      "         Pred 1    Pred 0\n",
      "True 1    47700      687\n",
      "True 0    1389     14951\n"
     ]
    }
   ],
   "source": [
    "best_model = RandomForestClassifier(**best_params, random_state=42)\n",
    "score=evaluate_model_on_folds(best_model,disp=1)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## SVM"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "name": "stderr",
     "output_type": "stream",
     "text": [
      "[I 2025-04-13 23:14:53,212] A new study created in memory with name: no-name-8cfcef7d-c57d-468b-95d0-8e244fb9eccb\n",
      "[I 2025-04-13 23:18:29,592] Trial 0 finished with value: 0.8555 and parameters: {'C': 1.2770386725976239e-05, 'kernel': 'sigmoid', 'gamma': 'scale'}. Best is trial 0 with value: 0.8555.\n",
      "[I 2025-04-13 23:20:11,291] Trial 1 finished with value: 0.9675 and parameters: {'C': 135.884689711956, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 1 with value: 0.9675.\n",
      "[I 2025-04-13 23:24:06,309] Trial 2 finished with value: 0.9244 and parameters: {'C': 0.011970328441774924, 'kernel': 'rbf', 'gamma': 'auto'}. Best is trial 1 with value: 0.9675.\n",
      "[I 2025-04-13 23:30:55,127] Trial 3 finished with value: 0.7108 and parameters: {'C': 0.04141499356569666, 'kernel': 'sigmoid', 'gamma': 'scale'}. Best is trial 1 with value: 0.9675.\n",
      "[I 2025-04-13 23:31:49,844] Trial 4 finished with value: 0.9628 and parameters: {'C': 0.947261534801931, 'kernel': 'linear', 'gamma': 'scale'}. Best is trial 1 with value: 0.9675.\n",
      "[I 2025-04-13 23:32:43,526] Trial 5 finished with value: 0.9814 and parameters: {'C': 989.5013721101076, 'kernel': 'rbf', 'gamma': 'auto'}. Best is trial 5 with value: 0.9814.\n",
      "[I 2025-04-13 23:36:41,995] Trial 6 finished with value: 0.8917 and parameters: {'C': 0.006214042918248762, 'kernel': 'rbf', 'gamma': 'auto'}. Best is trial 5 with value: 0.9814.\n",
      "[I 2025-04-13 23:38:20,503] Trial 7 finished with value: 0.9676 and parameters: {'C': 133.60274032463707, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 5 with value: 0.9814.\n",
      "[I 2025-04-13 23:40:46,777] Trial 8 finished with value: 0.9496 and parameters: {'C': 0.30715337324737013, 'kernel': 'rbf', 'gamma': 'auto'}. Best is trial 5 with value: 0.9814.\n",
      "[I 2025-04-13 23:42:08,322] Trial 9 finished with value: 0.8712 and parameters: {'C': 3381.0358499177346, 'kernel': 'sigmoid', 'gamma': 'scale'}. Best is trial 5 with value: 0.9814.\n",
      "[I 2025-04-13 23:43:29,067] Trial 10 finished with value: 0.8939 and parameters: {'C': 78982.04592910482, 'kernel': 'poly', 'gamma': 'auto', 'degree': 5, 'coef0': -0.26040961976504273}. Best is trial 5 with value: 0.9814.\n",
      "[I 2025-04-13 23:44:52,167] Trial 11 finished with value: 0.9675 and parameters: {'C': 95.05609185368571, 'kernel': 'linear', 'gamma': 'auto'}. Best is trial 5 with value: 0.9814.\n",
      "[I 2025-04-13 23:45:27,296] Trial 12 finished with value: 0.9771 and parameters: {'C': 93.18946882073548, 'kernel': 'poly', 'gamma': 'auto', 'degree': 2, 'coef0': 0.9175447769092318}. Best is trial 5 with value: 0.9814.\n",
      "[I 2025-04-13 23:46:32,033] Trial 13 finished with value: 0.9815 and parameters: {'C': 3831.788765236036, 'kernel': 'poly', 'gamma': 'auto', 'degree': 2, 'coef0': 0.9284343145666472}. Best is trial 13 with value: 0.9815.\n",
      "[I 2025-04-13 23:51:48,697] Trial 14 finished with value: 0.9819 and parameters: {'C': 43510.99264408625, 'kernel': 'poly', 'gamma': 'auto', 'degree': 2, 'coef0': 0.9606445565887975}. Best is trial 14 with value: 0.9819.\n",
      "[I 2025-04-14 00:01:16,215] Trial 15 finished with value: 0.9819 and parameters: {'C': 92248.07546375034, 'kernel': 'poly', 'gamma': 'auto', 'degree': 2, 'coef0': 0.9718202506515542}. Best is trial 14 with value: 0.9819.\n",
      "[I 2025-04-14 00:08:20,979] Trial 16 finished with value: 0.9821 and parameters: {'C': 62100.68185947325, 'kernel': 'poly', 'gamma': 'auto', 'degree': 2, 'coef0': 0.9944715925878024}. Best is trial 16 with value: 0.9821.\n",
      "[I 2025-04-14 00:09:57,267] Trial 17 finished with value: 0.9822 and parameters: {'C': 17431.85057616861, 'kernel': 'poly', 'gamma': 'auto', 'degree': 3, 'coef0': 0.3860570320218555}. Best is trial 17 with value: 0.9822.\n",
      "[I 2025-04-14 00:10:59,635] Trial 18 finished with value: 0.9655 and parameters: {'C': 8.578550955143877, 'kernel': 'poly', 'gamma': 'auto', 'degree': 4, 'coef0': 0.23313281665955066}. Best is trial 17 with value: 0.9822.\n",
      "[I 2025-04-14 00:13:35,838] Trial 19 finished with value: 0.8555 and parameters: {'C': 0.0003032830868780655, 'kernel': 'poly', 'gamma': 'auto', 'degree': 3, 'coef0': 0.2422271912251741}. Best is trial 17 with value: 0.9822.\n"
     ]
    },
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "Najlepsze parametry: {'C': 17431.85057616861, 'kernel': 'poly', 'gamma': 'auto', 'degree': 3, 'coef0': 0.3860570320218555}\n"
     ]
    }
   ],
   "source": [
    "def objective(trial):\n",
    "    C = trial.suggest_float('C', 1e-5, 1e5,log=True)\n",
    "    kernel = trial.suggest_categorical('kernel', ['linear', 'poly', 'rbf', 'sigmoid'])\n",
    "    gamma = trial.suggest_categorical('gamma', ['scale', 'auto'])\n",
    "    \n",
    "    if kernel == 'poly':\n",
    "        degree = trial.suggest_int('degree', 2, 5)\n",
    "        coef0 = trial.suggest_float ('coef0', -1, 1)\n",
    "        model = SVC(C=C, kernel=kernel, gamma=gamma, degree=degree, coef0=coef0, random_state=42)\n",
    "    else:\n",
    "        model = SVC(C=C, kernel=kernel, gamma=gamma, random_state=42)\n",
    "    \n",
    "    \n",
    "    score=evaluate_model_on_folds(model)\n",
    "    return round(score,4)  \n",
    "\n",
    "study = optuna.create_study(direction='maximize')\n",
    "study.optimize(objective, n_trials=n_trials)\n",
    "\n",
    "best_params = study.best_params\n",
    "print(\"Najlepsze parametry:\", best_params)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "name": "stdout",
     "output_type": "stream",
     "text": [
      "F1:           0.9822\n",
      "Accuracy:     0.9734\n",
      "Precision:    0.9796\n",
      "Sensitivity:  0.9849\n",
      "Specificity:  0.9392\n",
      "TP:           47656\n",
      "FP:           993\n",
      "TN:           15347\n",
      "FN:           731\n",
      "         Pred 1    Pred 0\n",
      "True 1    47656      731\n",
      "True 0     993     15347\n"
     ]
    }
   ],
   "source": [
    "best_model = SVC(**best_params, random_state=42)\n",
    "score=evaluate_model_on_folds(best_model,disp=1)"
   ]
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.5"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}
